package ve.com.ucv.changedetection.model;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;

/**
 * Created with IntelliJ IDEA.
 * User: josmaguirre
 * Date: 4/12/12
 * Time: 17:57
 * To change this template use File | Settings | File Templates.
 */
public class TidyImplementation {
    public static Document transformXML(String url){
        Tidy tidyObject = new Tidy();
        URL pageUrl;
        Document domTree = null;

        // SETTING TIDY CONFIGURATIONS
        tidyObject.setAsciiChars(true);
        tidyObject.setEscapeCdata(true);
        tidyObject.setShowErrors(0);
        tidyObject.setShowWarnings(false);
        tidyObject.setForceOutput(true);
        tidyObject.setHideComments(true);
        tidyObject.setPrintBodyOnly(true);

        try{
            pageUrl = new URL(url);
            URLConnection urlConnection = pageUrl.openConnection();
            domTree = tidyObject.parseDOM(urlConnection.getInputStream(), null);
        }catch(IOException e){
            e.printStackTrace();
            return null;
        }
        return domTree;
    }

}
